#Alexander F. Gazmararian
#afg2@princeton.edu
#January 9, 2024

#Purpose: Create indicators of counties with coal employment and those adjacent

#Load packages
library(tidyverse)
library(mapproj)
library(ggnewscale)
library(tidylog)
library(here)
library(maps)

#Load employment data
cbp <- readRDS(here("data", "inter", "cbp.rds"))
#plot coal data
counties <- maps::county.fips
mapcounties <- ggplot2::map_data("county")
usstates <- map_data("state")
counties <- counties %>% separate(polyname, into = c("state", "county"), sep = ",")

#Load data on county adjacency
adj <- read.csv("https://data.nber.org/census/geo/county-adjacency/2010/county_adjacency2010.csv")
#inspect distribution of coal employment averaged in 2005, 2006, 2007
d <- cbp %>%
  filter(year %in% c(2005,2006,2007)) %>%
  mutate(coal_prop = coal / emp_all) %>%
  group_by(fips) %>%
  summarize(coal = mean(coal_prop))
p.all <- d %>%
  ggplot() +
  geom_histogram(aes(x=coal),color="black",alpha=.5) +
  theme_bw(base_size=14) +
  labs(x="Proportion of County Coal Employment",y="Count",title="All Counties") +
  scale_y_continuous(expand = c(0,0), limits = c(0, 3100)) +
  theme(panel.grid = element_blank())
p.coal <- d %>%
  filter(coal>0) %>%
  ggplot() +
  geom_histogram(aes(x=coal),color="black",alpha=.5) +
  geom_vline(xintercept = 0.01, color = "red", size = 1.25) +
  theme_bw(base_size=14) +
  labs(x="Proportion of County Coal Employment",y="Count",title="All Counties with Coal Employment") +
  scale_y_continuous(expand = c(0,0), limits = c(0, 175)) +
  theme(panel.grid = element_blank())
p.desc <- gridExtra::grid.arrange(p.all, p.coal, ncol = 2)
##Figure C1
ggsave(
  p.desc,
  filename = here("output", "figures", "si_fig_C1_1per.png"),
  width = 6.5,
  height = 2.9,
  scale = 1.5,
  dpi = 300
)
d %>%
  filter(coal>0&coal<0.01) %>%
  summarize(
    mean = mean(coal),
    med = median(coal),
    max = max(coal),
    min = min(coal),
    sd = sd(coal)
  )
d %>%
  filter(coal>=0.01) %>%
  summarize(
    mean = mean(coal),
    med = median(coal),
    max = max(coal),
    min = min(coal),
    sd = sd(coal)
  )
#create coal proportion 
cbp$coal_prop <- with(cbp, coal / emp_all)
#create average coal employment from 2005 to 2007
coal_counties <- unique(subset(cbp, coal_prop > 0.01 & year %in%c(2005,2006,2007), select = c(fips)))
coal_counties$strata_bin <- 1
adj_merge <- merge(adj, coal_counties, by.x = "fipscounty", by.y = "fips", all.x = TRUE)
adj_merge <- subset(adj_merge, strata_bin == 1)
cbp$adjacent <- ifelse(cbp$fips %in% adj_merge$fipsneighbor, 1, 0)
summary(cbp$adjacent)
cbp <- cbp %>%
  mutate(
    adjacent = case_when(
      adjacent == 1 & coal_prop > 0.01 ~ 0,
      T ~ adjacent
    )
  )
summary(cbp$adjacent)

#create second level of adjacency
coal_counties <- unique(subset(cbp, adjacent == 1 & year == 2007, select = c(fips)))
coal_counties
coal_counties$strata_bin <- 1
adj_merge <- merge(adj, coal_counties, by.x = "fipscounty", by.y = "fips", all.x = TRUE)
adj_merge <- subset(adj_merge, strata_bin == 1)
cbp$adjacent2 <- ifelse(cbp$fips %in% adj_merge$fipsneighbor, 1, 0)
summary(cbp$adjacent2)
cbp <-
  cbp %>%
  mutate(
    adjacent2 = case_when(
      adjacent2 == 1 & coal_prop > 0.01 | adjacent == 1 ~ 0,
      T ~ adjacent2
    )
  )
#create three level variable
cbp <-
  cbp %>%
  mutate(
    adjgroup = case_when(
      coal_prop > 0.01 ~ "Coal",
      adjacent == 1 ~ "Near Neighbor",
      adjacent2 == 1 ~ "Distant Neighbor"
    ),
    adjgroup = factor(adjgroup, ordered = TRUE, levels = c("Coal", "Near Neighbor", "Distant Neighbor"))
  )
cbp_adj_out <- subset(cbp, year == 2007, select = c(fips,adjgroup))
table(cbp_adj_out$adjgroup)
#Save output
saveRDS(cbp_adj_out, here("data", "inter", "coal_adjacency.rds"))
